This script integrates the proteomics (TMT) dataset with the transcriptomic (RNAseq) space in order to explore the proteomic signature of the archetypes and compare their proteomic and transcriptomic profiles.
The integration is carried out by aligning the proteomic and the transcriptomic spaces using Procrustes superimposition.
In order to end up with the proteomics profile of the archetypes it makes sense to define the proteomics as the target matrix (X) and then superimpose the GE matrix (Y) to match the proteomics space. The archetypes position can then be imputed in the proteomics space. Then the proteomic profile of the archetypes can be calculated similarly to the way the gene expression profiles were caculated in 1_fit_archetypes.Rmd, by finding proteins whose abundance decreases significantly with distance from archetype position (or with limma).
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
knitr::opts_chunk$set(cache=TRUE, cache.extra = R.version)
suppressPackageStartupMessages({
library(ggplot2)
library(ggfortify)
library(GGally)
library(cowplot)
library(Matrix)
library(tidyverse)
library(pins)
library(grid)
library(vegan)
})
# A function for predicting new data from procrustes fit object regardless of scaling
# Modified from vegan::predict.procrustes
# y.cs is centroid size of the original matrix (Y)
# y.means is the centroid of the original matrix (Y)
predict.proc <- function (object, newdata, y.cs=NULL, y.mean) {
Y <- as.matrix(newdata)
Y <- Y - matrix(y.mean, byrow=TRUE, nr=nrow(Y), nc=length(y.mean))
if (object$symmetric & !is.null(y.cs)) Y <- Y/y.cs
Y <- (object$scale * Y) %*% object$rotation
colnames(Y) <- colnames(newdata)
Y
}
Loading clean gene expression and archetyppe data from the first step (1_fit_archetypes).
Loading proteomics data from Emory (TMT-labeled MS/MS-based; A & T Wingo).
# key between proteomics ID and project ID (from T&A Wingo)
protkey <- read_csv("data/metadata/proteomics_ROS_MAP_TRAITS_clean.csv",
col_types=cols(.default = "c")) %>%
dplyr::select(-Batch)
# Load metadata with archetype classification and distances
arch.meta <- readRDS("analyses/rnaseq/1_fit_archetypes/archetypes_meta_k3p20_AD.RDS") %>%
merge(protkey, by="projid", all.x=TRUE, all.y=FALSE) %>%
arrange(rnaseq_id)
# Load Emory's proteomics data, transpose so that samples are rows, match with rnaseq_id and make it rownames
data.pt <- read_csv("data/proteomics_n391_residual_log2_batchMSsexPMIageStudy.csv") %>%
column_to_rownames("X1") %>%
drop_na() %>% # remove proteins with any na values
t() %>% as.data.frame() %>%
rownames_to_column(var="proteomicsid") %>%
merge(select(arch.meta, rnaseq_id, proteomicsid), by="proteomicsid", all=FALSE) %>%
arrange(rnaseq_id) %>%
select(-proteomicsid) %>%
column_to_rownames("rnaseq_id")
# Load gene expression data for all samples and the three archetypes
arch.ge <- readRDS("analyses/rnaseq/1_fit_archetypes/archetypes_ge_k3p20_AD.RDS")
# Load gene expression PCA for all samples and the three archetypes
arch.pca <- readRDS("analyses/rnaseq/1_fit_archetypes/archetypes_pca_k3p20_AD.RDS")
varpc <- 100 * arch.pca[["all.pca"]]$sdev^2 / sum(arch.pca[["all.pca"]]$sdev^2) %>%
round(2)
arch.pca <- arch.pca$arch.pca %>%
as.data.frame() %>%
rownames_to_column(var="rnaseq_id")
# Load top genes associated with each archetype
topgenes <- readRDS("analyses/rnaseq/2b_topgenes_annotations/topgenes.anno.RDS")
# Filter arch.meta to include only the intersection between ge and pt
meta.pt <- arch.meta %>%
filter(rnaseq_id %in% rownames(data.pt))
# Load arcfit object
arcfit <- readRDS("analyses/rnaseq/1_fit_archetypes/arcfit_k3.RDS")[["AD"]][["p20"]]
# Load logFC for all genes
LogFC.ge <- readRDS("analyses/rnaseq/1_fit_archetypes/logFC_allres_k3p20_AD.RDS")
archnames <- c("Archetype_1", "Archetype_2", "Archetype_3")
Cge.arch <- arch.pca%>%
filter( rnaseq_id %in% archnames) %>%
column_to_rownames("rnaseq_id")
Cge <- arch.pca %>%
filter(! rnaseq_id %in% archnames)
Cge %>%
mutate(Archetype = arch.meta$Archetype, Diagnosis = arch.meta$Diagnosis) %>%
ggplot() +
geom_point(aes(x=PC1, y=PC2, color=Archetype, shape=Diagnosis)) +
geom_point(data=Cge.arch, aes(x=PC1, y=PC2), color="black", size=10, shape="*") +
geom_label(data=Cge.arch, aes(x=PC1, y=PC2, label=c("1","2","3")), nudge_y = 3, size=3) +
xlab(paste0("PC1 (", varpc[1], "%)")) +
ylab(paste0("PC2 (", varpc[2], "%)")) +
ggtitle("Transcriptomics PCA")
# extract samples and archetypes that are not in the proteomic data ("new data" for the predict function)
Cge.nd <- Cge %>%
filter(! rnaseq_id %in% rownames(data.pt)) %>%
column_to_rownames("rnaseq_id")
# extract samples and archetypes that occur in the proteomic data
Cge <- Cge %>%
filter(rnaseq_id %in% rownames(data.pt)) %>%
column_to_rownames("rnaseq_id")
PCpt <- prcomp(data.pt)
autoplot(PCpt,
data = meta.pt,
colour = 'Archetype',
main="Proteomics PCA")
Cpt <- PCpt$x[,1:20]
Making sure samples are in the same order in both spaces
order(match(rownames(Cpt), rownames(Cge)))
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
Superimposing two configurations (ordinations in this case) with Procrustes superimposition involves translating them to the origin, scaling them to a common scale, and rotating one matrix (Y) onto the other (X, the target matrix). The roration matrix is found by decomposing the crossproduct of X and Y. The scaling is controlled by two parameters in the vegan::procrustes() function: scale and symmetric.
The code for vegan::procrustes() indicates the following:
symmetric==TRUE: both matrices are scaled by their centroid size (variance*(n-1)); otherwise the two matrices are translated and Y is rotated without any scaling.
scale==TRUE: the rotated Y matrix is scaled by the centroid size of the crossproduct, while X isn’t scaled at all; otherwise, if symmetric==TRUE both matrices end up with centroid size 1, if symmetric==FALSE both retain their original scale.
Using scale=FALSE and symmetric==FALSE (partial PS) is not sensible here because it does not put the spaces in a comparable scale, and the original scale isn’t meaningful here in itself (unlike size when objects like skulls are compared). We want the archetypes proteomic imputations to be comparable to the protemic profiles of the real patients.
Using scale=FALSE and symmetric==TRUE (full PS) is the usual practice in geometric morphometrics and preserve the procrustes distance between the two configurations as both matrices end up scaled to centroid size 1. However, predictions will not be in the original proteomics scale, which is what we want here eventually.
Using scale=TRUE and symmetric==FALSE is the only option for predict.procrustes(). X is in its original scale, Y is scaled by the crossproduct. It should be possible in principle to do it with symmetric PS (as often done in GM), but then it would require another step to translate the archetype imputations to the original proteomics space to get their proteomic profiles.
Using scale=TRUE and symmetric==TRUE is the only option for protest. It is said in the manual that it minimizes residuals compared to scale==FALSE and symmetric==TRUE, but it is probably more sensitive to samples that are outliers in one space but not in the other (dispersing the differences more “equaly” among the non-outliers, dragging everything else towards the outliers).
Eventually, the results are always presented in the coordinate basis of the target, with both matrices translated to the origin. The target matrix may be scaled depending on the above parameters but not rotated.
The plots below show the superimposed ordinations, once when the target matrix is the transcriptomics and once when it’s the proteomics. The arrows point to the target, black circles are the rotated matrix. The violin plots show the distribution of residuals ( sum of squared deviations between the two ordinations) broken down by archetype classes.
There is not much difference between the archetypes in how well their gene expression aligns with their protemics, regardless of which matrix is the target. Most of the times, archetype 1 is slightly less well aligned than the other two, when the target is transcriptomics.
cs.ge <- sqrt(sum(Cge^2)) # centroid size of ge ordination
mean.ge <- colMeans(Cge) # mean of ge ordination
Cge.arch <- as.matrix(Cge.arch)
pars <- expand.grid(c(TRUE,FALSE), c(TRUE,FALSE))[,2:1]
rownames(pars) <- c("TT", "TF", "FT", "FF")
res.ptL <- res.geL <- Carch.ptL <- Carch.geL <- distCl.ptL <- distCl.geL <- list()
# target is protemoics
for (p in rownames(pars)) {
scale=pars[p,1]
symmetric=pars[p,2]
res <- vegan::procrustes(Cpt, Cge, scale=scale, symmetric=symmetric)
# imputed position of archetypes (translated, scaled, and rotated)
ap <- predict.proc(res, Cge.arch, y.cs=cs.ge, y.mean=mean.ge)
# distances between proteomics samples and imputed archetypes
dist <- as.matrix(dist(rbind(ap, res$X), method="euclidean"))[-c(1:3),1:3] %>%
as.data.frame()
# classification of proteomic samples to imputed archetypes
dist$Archetype <- apply(dist, 1, function(x){names(x)[x==min(x)]})
Carch.ptL[[p]] <- ap
res.ptL[[p]] <- res
distCl.ptL[[p]] <- dist
rm(dist, res, ap)
}
# target is transcriptomics
for (p in rownames(pars)) {
scale=pars[p,1]
symmetric=pars[p,2]
res <- vegan::procrustes(Cge, Cpt, scale=scale, symmetric=symmetric)
# aligned position of archetypes (translated, and scaled if symmetric==TRUE)
if(symmetric) {ap <- (Cge.arch-mean.ge)/cs.ge} else {ap <- Cge.arch-mean.ge}
# distances between proteomics samples and aligned archetypes
dist <- as.matrix(dist(rbind(ap, res$X), method="euclidean"))[-c(1:3),1:3] %>%
as.data.frame()
# classification of proteomic samples to aligned archetypes
dist$Archetype <- apply(dist, 1, function(x){names(x)[x==min(x)]})
Carch.geL[[p]] <- ap
res.geL[[p]] <- res
distCl.geL[[p]] <- dist
rm(dist, res, ap)
}
res.pt <- res.ptL[["TT"]]
res.ge <- res.geL[["TT"]]
data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.ptL[["TT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is proteomics")
data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.geL[["TT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is transcriptomics")
res.pt <- res.ptL[["TF"]]
res.ge <- res.geL[["TF"]]
data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.ptL[["TF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is proteomics")
data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.geL[["TF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is transcriptomics")
res.pt <- res.ptL[["FT"]]
res.ge <- res.geL[["FT"]]
data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.ptL[["FT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is proteomics")
data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.geL[["FT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is transcriptomics")
res.pt <- res.ptL[["FF"]]
res.ge <- res.geL[["FF"]]
data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.ptL[["FF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is proteomics")
data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
ggplot() +
geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
geom_label(data=Carch.geL[["FF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
ggtitle("Target is transcriptomics")
Sum of squared deviations between patients position in proteomics and transcriptomics after superimposition.
res.pt <- res.ptL[["TT"]]
res.ge <- res.geL[["TT"]]
# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.ge)) %>%
mutate(Target="Transcriptomics"),
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.pt)) %>%
mutate(Target="Proteomics")
)
# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
xlab(NULL)
res.pt <- res.ptL[["TF"]]
res.ge <- res.geL[["TF"]]
# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.ge)) %>%
mutate(Target="Transcriptomics"),
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.pt)) %>%
mutate(Target="Proteomics")
)
# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
xlab(NULL)
# plot residuals by archetype
ggplot(filter(PTdist, Target=="Proteomics"), aes(x=Archetype, y=PTdist, fill=Target)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
xlab(NULL)
res.pt <- res.ptL[["FT"]]
res.ge <- res.geL[["FT"]]
# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.ge)) %>%
mutate(Target="Transcriptomics"),
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.pt)) %>%
mutate(Target="Proteomics")
)
# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
xlab(NULL)
res.pt <- res.ptL[["FF"]]
res.ge <- res.geL[["FF"]]
# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.ge)) %>%
mutate(Target="Transcriptomics"),
meta.pt %>%
select(Archetype, rnaseq_id) %>%
mutate(PTdist=residuals(res.pt)) %>%
mutate(Target="Proteomics")
)
# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
xlab(NULL)
Distance between each patient from each archetypes in the transcriptomic space compared to their distances in the proteomic space after superimposition, shown below when the target is proteomics and when the target is transcriptomics.
# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["TT"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["TT"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of ge samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
select(rnaseq_id, Archetype_1:Archetype_3) %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is proteomics") +
facet_wrap("dist_from", scale="free_y")
merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is transcriptomics") +
facet_wrap("dist_from", scale="free_y")
# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["TF"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["TF"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of rnaseq samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
select(rnaseq_id, Archetype_1:Archetype_3) %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is proteomics") +
facet_wrap("dist_from", scale="free_y")
merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is transcriptomics") +
facet_wrap("dist_from", scale="free_y")
# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["FT"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["FT"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of rnaseq samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
select(rnaseq_id, Archetype_1:Archetype_3) %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is proteomics") +
facet_wrap("dist_from", scale="free_y")
merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is transcriptomics") +
facet_wrap("dist_from", scale="free_y")
# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["FF"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["FF"]] %>%
select(-Archetype) %>%
rownames_to_column("rnaseq_id") %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")
# distance of rnaseq samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
select(rnaseq_id, Archetype_1:Archetype_3) %>%
pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is proteomics") +
facet_wrap("dist_from", scale="free_y")
merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
ggplot() +
geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
ggtitle("Distances from archetypes when target is transcriptomics") +
facet_wrap("dist_from", scale="free_y")
table("Proteomics"=distCl.ptL[["TT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Proteomics")
table("Proteomics"=distCl.geL[["TT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Transcriptomics")
table("Proteomics"=distCl.ptL[["TF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Proteomics")
table("Proteomics"=distCl.geL[["TF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Transcriptomics")
table("Proteomics"=distCl.ptL[["FT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Proteomics")
table("Proteomics"=distCl.geL[["FT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Transcriptomics")
table("Proteomics"=distCl.ptL[["FF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Proteomics")
## Warning in legend(residuals, gpfun, residuals_type): All residuals are zero.
table("Proteomics"=distCl.geL[["FF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>%
vcd::mosaic(split_vertical=TRUE, shade=TRUE,
labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE),
main="Target is Transcriptomics")
For each bootstrapped space, reversing the proteomics PCA for the imputed archetypes and finding their centroid. This creates a “population” of 200 samples for each archetype and 200 samples of “control” (the centroid). This then goes into limma to find logFC for each protein and each archetype relative to control, similar to the way logFC was calculated for gene expression data. ## Reconstructing archetype samples
res <- res.ptL[["FT"]] # procrustes results with proteomics as target
BCL <- arcfit$pch_fits$XC # bootstrapped spaces
k = ncol(BCL[[1]]) # number of archetypes
p = nrow(BCL[[1]]) # number of PC's
nb = length(BCL) # number of bootstrapped replicates
# calculate PC scores for the centroid ("control" samples) in each bootstrapped space
Bcontrol <- sapply(BCL, rowMeans)
# Reorganize bootstrapped scores into one matrix including "control" samples (4*nb x p)
BC <- BCL %>%
unlist() %>%
matrix(nr=p, nc=k*nb) %>%
cbind(Bcontrol) %>%
t()
# impute position of boostrapped archetypes in the proteomics space
BCrot <- predict.proc(res, BC, y.cs=NULL, y.mean=mean.ge)
# project everything back to original proteomics space and translate to original center
# (reversing the pca procedure)
rotM <- PCpt$rotation # rotation matrix (eigenvectors)
mu <- PCpt$center # means of proteomics data by which pc's were centered
BCpt <- BCrot %*% t(rotM[,1:p]) %>% # rotating
scale(center = -mu, scale = FALSE) # translating
rm(res, BCL, BC, Bcontrol)
types.b <- rep(c(archnames, "Control"), each=nb)
# calculate LogFC using limma
lm.res.pt <- logFC.pt <-list()
for (a in archnames) {
type <- types.b[types.b %in% c(a, "Control")]
de.df <- as.data.frame(t( BCpt[types.b %in% c(a, "Control"),] ))
fit <- limma::lmFit(de.df, design=model.matrix(~type))
fit <- limma::eBayes(fit)
tt <- limma::topTable(fit, number=Inf, coef=2)
lm.res.pt[[a]] <- tt
logFC.pt[[a]] <- tt$logFC
rm(tt,fit, de.df, type)
}
logFC.pt$protein <- colnames(BCpt)
logFC.pt <- as.data.frame(logFC.pt)
# visualize with heatmap
ComplexHeatmap::Heatmap(as.matrix(logFC.pt[,-4]),
name="LogFC",
show_row_dend = FALSE,
show_row_names = FALSE,
#row_order = order(logFC.pt[,3])
)
There is some aggregation of gene-protein pairwise correlations by archetype.
Archetype_1 = coral; Archetype_2 = green; Archetype_3 = blue
# Correlations between all proteins and all genes, focusing on the top genes
ge.top <- arch.ge[meta.pt$rnaseq_id,topgenes$Ensembl.ID]
R <- cor(ge.top, data.pt)
rc <- character(nrow(topgenes))
rc <- ifelse(topgenes$Archetype=="Archetype_1", "coral3",
ifelse(topgenes$Archetype=="Archetype_2", "green", "blue"))
heatmap(R, RowSideColors = rc)
Match gene names with protein names
genes <- LogFC.ge[[1]] %>% select(HGNC.ID) %>% drop_na(HGNC.ID) %>% pull()
gp <- c()
for (gn in genes) {
gpv <- grep(paste0("^\\d*[;|\\|]*", gn, "[;|\\|]"), colnames(data.pt), value=TRUE)
if (length(gpv) > 0) {
gp <- rbind(gp,
cbind("gene"=gn, "protein"=gpv))
} else {
gp <- rbind(gp,
cbind("gene"=gn, "protein"=NA_character_))
}
}
gp <- as.data.frame(gp)
6 of the genes match two proteins each (instead of one)
cbind(gp[duplicated(gp[,"gene"], incomparables = NA_character_),],
"protein2"=gp[duplicated(gp[,"gene"], incomparables = NA_character_, fromLast = TRUE),"protein"])
## gene protein protein2
## 6473 CSNK2A1 CSNK2A1; CSNK2A1P; CSNK2A3|Q8NEV1 <NA>
## 6751 RABGAP1L RABGAP1L|B7ZAP0 CSNK2A1|P68400
## 7981 POLR2J4 <NA> RABGAP1L|Q5R372
## 8395 NACA NACA|E9PAV3 NACA|Q13765
## 9820 AKAP7 AKAP7|Q9P0M2 AKAP7|O43687
## 11456 HLA-A HLA-A|P13746 HLA-A; LOC100507703|P01892
## 12843 PDPK1 PDPK1|Q6A1A2 PDPK1|O15530
Merge protein names with archetypes topgenes
gpp <-topgenes %>%
select(HGNC.ID, Archetype) %>%
filter(!is.na(HGNC.ID)) %>%
rename(gene=HGNC.ID) %>%
merge(gp, by="gene", all=TRUE, incomparables=NA, sort=FALSE) %>%
merge(select(logFC.pt, protein), by="protein", all=TRUE, incomparables=NA, sort=FALSE) %>%
mutate(Archetype=replace_na(Archetype, "None"))
#%>%
# mutate("InOut"=if_else(is.na(gene), "Excluded", "Included"))
Number of proteins that are (or are not) included in the gene sets, and number of genes in each set that are included in the proteomic dataset
gpp %>%
mutate(protein=if_else(is.na(protein), "Out", "In"),
gene=if_else(is.na(gene), "Out", "In")) %>%
# table() %>%
ggplot(mapping=aes(x=gene, fill=protein)) +
geom_bar(position = "fill") +
geom_text(aes(label = ..count..), stat = "count", position = "fill", vjust=1.5) +
facet_wrap(~Archetype)
gp <- gpp %>%
filter(!is.na(protein) & !is.na(gene)) %>%
merge(select(LogFC.ge[[1]], HGNC.ID, Ensembl.ID),
by.x="gene", by.y="HGNC.ID", all.x=TRUE, all.y=FALSE, sort=FALSE, incomparables = NA) %>%
mutate_all(as.character)
R.gp <- cor(arch.ge[meta.pt$rnaseq_id, gp$Ensembl.ID], data.pt[meta.pt$rnaseq_id, gp$protein]) %>%
diag() %>%
as_tibble_col(column_name="ge.pt.cor") %>%
mutate(gene=gp$gene)
gp <- merge(gp, R.gp, by="gene")
ggplot(filter(gp, Archetype!="None"), aes(x=Archetype, y=ge.pt.cor)) +
geom_boxplot()
lfc.ge <- rbind(cbind(LogFC.ge[[1]], Archetype="Archetype_1"),
cbind(LogFC.ge[[2]], Archetype="Archetype_2"),
cbind(LogFC.ge[[3]], Archetype="Archetype_3")) %>%
filter(!is.na(HGNC.ID)) %>%
select(gene=HGNC.ID, Archetype, logFC.ge=logFC)
lfc.pt <- logFC.pt %>%
pivot_longer(-protein, names_to = "Archetype", values_to = "logFC.pt")
gp <- merge(gp, lfc.pt, by=c("protein", "Archetype"), all.x = TRUE, all.y=FALSE) %>%
merge(lfc.ge, by=c("gene", "Archetype"), all.x = TRUE, all.y=FALSE)
filter(gp, Archetype != "None") %>%
ggplot(aes(x=logFC.ge, y=logFC.pt, color=Archetype)) +
geom_point() +
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
Pairwise distances between samples are preserved in Procrustes superimposition but not in Canonical Correlations.
cca <- CCorA(Cge, Cpt)
pwd.ge.cc <- as.matrix(dist(cca$Cy))
pwd.pt.cc <- as.matrix(dist(cca$Cx))
pwd.ge <- as.matrix(dist(Cge))
pwd.pt <- as.matrix(dist(Cpt))
pwd.ge.ps <- as.matrix(dist(res.ptL[["FT"]]$Yrot))
pwd.pt.ps <- as.matrix(dist(res.geL[["FT"]]$Yrot))
df <- data.frame(pwd.ge = pwd.ge[upper.tri(pwd.ge)],
pwd.pt = pwd.pt[upper.tri(pwd.pt)],
pwd.ge.cc = pwd.ge.cc[upper.tri(pwd.ge.cc)],
pwd.pt.cc = pwd.pt.cc[upper.tri(pwd.pt.cc)],
pwd.ge.ps = pwd.ge.ps[upper.tri(pwd.ge.ps)],
pwd.pt.ps = pwd.pt.ps[upper.tri(pwd.pt.ps)])
ggpairs(df)
ggplot(df) +
geom_point(aes(x=pwd.ge, y=pwd.ge.ps)) +
ggtitle("Pairwise distances between transcriptomic samples \n before and after Procrustes superimposition") +
xlab("Original PCA distances") +
ylab("Superimposed distances")
ggplot(df) +
geom_point(aes(x=pwd.pt, y=pwd.pt.ps)) +
ggtitle("Pairwise distances between proteomic samples \n before and after Procrustes superimposition") +
xlab("Original PCA distances") +
ylab("Superimposed distances")
ggplot(df) +
geom_point(aes(x=pwd.ge, y=pwd.ge.cc)) +
ggtitle("Pairwise distances between transcriptomic samples in PCA vs CCA") +
xlab("Original PCA distances") +
ylab("CCA distances")
ggplot(df) +
geom_point(aes(x=pwd.pt, y=pwd.pt.cc)) +
ggtitle("Pairwise distances between proteomic samples in PCA vs CCA") +
xlab("Original PCA distances") +
ylab("CCA distances")
outpath <- "analyses/rnaseq/6_proteomics_integration/"
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.2 (2019-12-12)
## os macOS Mojave 10.14.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-09-22
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
## backports 1.1.6 2020-04-05 [1] CRAN (R 3.6.2)
## broom 0.5.5 2020-02-29 [1] CRAN (R 3.6.0)
## callr 3.4.3 2020-03-28 [1] CRAN (R 3.6.2)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.6.0)
## circlize 0.4.8 2019-09-08 [1] CRAN (R 3.6.0)
## cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.0)
## clue 0.3-57 2019-02-25 [1] CRAN (R 3.6.0)
## cluster 2.1.0 2019-06-19 [1] CRAN (R 3.6.1)
## codetools 0.2-16 2018-12-24 [1] CRAN (R 3.6.1)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0)
## ComplexHeatmap 2.2.0 2019-10-29 [1] Bioconductor
## cowplot * 1.0.0 2019-07-11 [1] CRAN (R 3.6.0)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 3.6.0)
## dbplyr 1.4.2 2019-06-17 [1] CRAN (R 3.6.0)
## desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.0)
## devtools 2.3.0 2020-04-10 [1] CRAN (R 3.6.1)
## digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.0)
## dplyr * 0.8.5 2020-03-07 [1] CRAN (R 3.6.0)
## ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0)
## farver 2.0.3 2020-01-16 [1] CRAN (R 3.6.0)
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 3.6.0)
## fs 1.4.1 2020-04-04 [1] CRAN (R 3.6.2)
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.0)
## GetoptLong 0.1.8 2020-01-08 [1] CRAN (R 3.6.0)
## GGally * 1.5.0 2020-03-25 [1] CRAN (R 3.6.0)
## ggfortify * 0.4.9 2020-03-11 [1] CRAN (R 3.6.0)
## ggplot2 * 3.3.0 2020-03-05 [1] CRAN (R 3.6.0)
## GlobalOptions 0.1.1 2019-09-30 [1] CRAN (R 3.6.0)
## glue 1.4.0 2020-04-03 [1] CRAN (R 3.6.2)
## gridExtra 2.3 2017-09-09 [1] CRAN (R 3.6.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0)
## haven 2.2.0 2019-11-08 [1] CRAN (R 3.6.0)
## hms 0.5.3 2020-01-08 [1] CRAN (R 3.6.0)
## htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
## httr 1.4.1 2019-08-05 [1] CRAN (R 3.6.0)
## jsonlite 1.6.1 2020-02-02 [1] CRAN (R 3.6.0)
## knitr 1.28 2020-02-06 [1] CRAN (R 3.6.0)
## labeling 0.3 2014-08-23 [1] CRAN (R 3.6.0)
## lattice * 0.20-41 2020-04-02 [1] CRAN (R 3.6.2)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.0)
## limma 3.42.2 2020-02-03 [1] Bioconductor
## lmtest 0.9-37 2019-04-30 [1] CRAN (R 3.6.0)
## lubridate 1.7.8 2020-04-06 [1] CRAN (R 3.6.2)
## magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
## MASS 7.3-51.5 2019-12-20 [1] CRAN (R 3.6.1)
## Matrix * 1.2-18 2019-11-27 [1] CRAN (R 3.6.0)
## memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.0)
## mgcv 1.8-31 2019-11-09 [1] CRAN (R 3.6.0)
## modelr 0.1.6 2020-02-22 [1] CRAN (R 3.6.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0)
## nlme 3.1-145 2020-03-04 [1] CRAN (R 3.6.0)
## permute * 0.9-5 2019-03-12 [1] CRAN (R 3.6.0)
## pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.1)
## pins * 0.4.1 2020-05-28 [1] CRAN (R 3.6.2)
## pkgbuild 1.0.6 2019-10-09 [1] CRAN (R 3.6.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0)
## pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.0)
## plyr 1.8.6 2020-03-03 [1] CRAN (R 3.6.0)
## png 0.1-7 2013-12-03 [1] CRAN (R 3.6.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 3.6.0)
## processx 3.4.2 2020-02-09 [1] CRAN (R 3.6.0)
## ps 1.3.2 2020-02-13 [1] CRAN (R 3.6.0)
## purrr * 0.3.3 2019-10-18 [1] CRAN (R 3.6.0)
## R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 3.6.0)
## Rcpp 1.0.4.6 2020-04-09 [1] CRAN (R 3.6.1)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 3.6.0)
## remotes 2.1.1 2020-02-15 [1] CRAN (R 3.6.0)
## reprex 0.3.0 2019-05-16 [1] CRAN (R 3.6.0)
## reshape 0.8.8 2018-10-23 [1] CRAN (R 3.6.0)
## rjson 0.2.20 2018-06-08 [1] CRAN (R 3.6.0)
## rlang 0.4.5 2020-03-01 [1] CRAN (R 3.6.0)
## rmarkdown 2.1 2020-01-20 [1] CRAN (R 3.6.0)
## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0)
## rstudioapi 0.11 2020-02-07 [1] CRAN (R 3.6.0)
## rvest 0.3.5 2019-11-08 [1] CRAN (R 3.6.0)
## scales 1.1.0 2019-11-18 [1] CRAN (R 3.6.1)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
## shape 1.4.4 2018-02-07 [1] CRAN (R 3.6.0)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
## testthat 2.3.2 2020-03-02 [1] CRAN (R 3.6.0)
## tibble * 3.0.0 2020-03-30 [1] CRAN (R 3.6.2)
## tidyr * 1.0.2 2020-01-24 [1] CRAN (R 3.6.0)
## tidyselect 1.0.0 2020-01-27 [1] CRAN (R 3.6.0)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 3.6.0)
## usethis 1.6.0 2020-04-09 [1] CRAN (R 3.6.1)
## vcd 1.4-7 2020-04-02 [1] CRAN (R 3.6.2)
## vctrs 0.2.4 2020-03-10 [1] CRAN (R 3.6.0)
## vegan * 2.5-6 2019-09-01 [1] CRAN (R 3.6.0)
## withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0)
## xfun 0.12 2020-01-13 [1] CRAN (R 3.6.0)
## xml2 1.3.1 2020-04-09 [1] CRAN (R 3.6.2)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.0)
## zoo 1.8-7 2020-01-10 [1] CRAN (R 3.6.0)
##
## [1] /Users/habera/Dropbox/ampad_archetypes/renv/library/R-3.6/x86_64-apple-darwin15.6.0
## [2] /private/var/folders/ls/45q5krvs5xd4m27ks7cmhy80g4hpwp/T/Rtmpp6Mgpo/renv-system-library
## [3] /Library/Frameworks/R.framework/Versions/3.6/Resources/library